********************************************************************************
***Clean and reconcile make, model, year values (collected by 3 MTurkers)

use "data/inputs/mturk/makemodel_to_carvalues.dta", clear

* Adjust to uppercase to avoid merge issues
foreach m in make model {
	replace inputcar_`m' = upper(inputcar_`m')
}

foreach var of varlist input* {
	local newname = subinstr("`var'", "input", "", .)
	rename `var' `newname'
}

* Process string variable input and clean to obtain vehicle values 
foreach var of varlist answer* {
	
	*remove commas, because they separate our numbers
	gen `var'n = subinstr(`var', ",", "", .)

	*check those with characters in them
	gen `var'char = regexm(`var', "[a-zA-Z]")
	gen `var'num = regexm(`var', "[0-9]")
	
	*make missing those with characters and no numbers
	replace `var'n = "" if `var'char == 1 & `var'num == 0
	
	*see where there are characters and numbers mixed
	list `var'n if `var'char == 1 & `var'num == 1

	*manually recode entry to missing for responses reporting no value available
	replace `var'n = "" if `var'char == 1 & (regexm(`var'n,"1988") | regexm(`var'n,"1990"))
	
	*check again	
	list `var'n if `var'char == 1 & `var'num == 1
	
	*remove the cases of .00 at the end of a string
	replace `var'n = subinstr(`var'n, ".00", "", .)
	
	*remove periods
	replace `var'n = subinstr(`var'n, ".", "", .)
	
	*pull out numbers
	gen `var'num1 = regexs(0) if(regexm(`var'n, "[0-9]+"))

	*rename removing "answer"
	local varname = subinstr("`var'", "answer", "", .)
	di "`varname'"
	rename `var'num1 `varname'
	
	destring `varname', replace
	
	replace `varname' = . if inlist(`varname',0,1)
	
	bysort car_make car_model car_year: egen mymakemodelyear`varname' = median(`varname')
	
	* Replace vehicle values about $100k as missing
	local upperlimit = 100000
	replace mymakemodelyear`varname' = . ///
		if mymakemodelyear`varname' > `upperlimit' & !missing(mymakemodelyear`varname')
	
}

* Reshape answer wide for comparison
keep car_make car_model car_year mileage ///
	answervalue_private answervalue_retail answervalue_tradein my*
bysort car_make car_model car_year: gen nval = _n
reshape wide answervalue_private answervalue_retail answervalue_tradein, ///
	i(car_make car_model car_year) j(nval)
	
* Retain all values for manual inspection against median value
gen answervalue_private = ""
gen answervalue_retail = ""
gen answervalue_tradein = ""
foreach var of varlist answervalue_private answervalue_retail answervalue_tradein {
	forval i=1/6 {
		replace `var' = `var' + `var'`i' + ", " ///
		if `var'`i' != ""
	}
}

keep car_make car_model car_year mileage ///
	answervalue_private answervalue_retail answervalue_tradein my*

drop if mi(car_model)

isid car_make car_model car_year

keep car_make car_model car_year mileage mymake*

foreach v in private retail tradein {
	rename mymakemodelyearvalue_`v' makemodelyearvalue_`v'
}

isid car_make car_model car_year

save "data/int/MTurk MMY Clean.dta", replace


